/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.internal.csv;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.solr.SolrTestCase;

/**
 * CSVParserTest
 *
 * <p>The test are organized in three different sections: The 'setter/getter' section, the lexer
 * section and finally the parser section. In case a test fails, you should follow a top-down
 * approach for fixing a potential bug (it's likely that the parser itself fails if the lexer has
 * problems...).
 */
public class CSVParserTest extends SolrTestCase {

  /** TestCSVParser. */
  static class TestCSVParser extends CSVParser {
    /**
     * Test parser to investigate the type of the internal Token.
     *
     * @param in a Reader
     */
    TestCSVParser(Reader in) {
      super(in);
    }

    TestCSVParser(Reader in, CSVStrategy strategy) {
      super(in, strategy);
    }

    /**
     * Calls super.nextToken() and prints out a String representation of token type and content.
     *
     * @return String representation of token type and content
     * @throws IOException like {@link CSVParser#nextToken()}
     */
    public String testNextToken() throws IOException {
      Token t = super.nextToken();
      return Integer.toString(t.type) + ";" + t.content + ";";
    }
  }

  // ======================================================
  //   lexer tests
  // ======================================================

  // Single line (without comment)
  public void testNextToken1() throws IOException {
    String code = "abc,def, hijk,  lmnop,   qrst,uv ,wxy   ,z , ,";
    TestCSVParser parser = new TestCSVParser(new StringReader(code));
    assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
    assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
  }

  // multiline including comments (and empty lines)
  public void testNextToken2() throws IOException {
    /*   file:   1,2,3,
     *           a,b x,c
     *
     *           # this is a comment
     *           d,e,
     *
     */
    String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
    CSVStrategy strategy =
        new CSVStrategy(
            ',',
            '"',
            '#',
            CSVStrategy.ESCAPE_DISABLED,
            true,
            true,
            false,
            true,
            CSVStrategy.DEFAULT_PRINTER_NEWLINE);

    TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);

    assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
    assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
    assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
  }

  // simple token with escaping
  public void testNextToken3() throws IOException {
    /* file: a,\,,b
     *       \,,
     */
    String code = "a,\\,,b\n\\,,";
    CSVStrategy strategy =
        new CSVStrategy(
            ',',
            '"',
            '#',
            CSVStrategy.ESCAPE_DISABLED,
            true,
            true,
            false,
            true,
            CSVStrategy.DEFAULT_PRINTER_NEWLINE);
    TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);

    assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
    // an unquoted single backslash is not an escape char
    assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
    // an unquoted single backslash is not an escape char
    assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
    assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
  }

  // encapsulator tokenizer (single line)
  public void testNextToken4() throws IOException {
    /* file:  a,"foo",b
     *        a,   " foo",b
     *        a,"foo "   ,b     // whitespace after closing encapsulator
     *        a,  " foo " ,b
     */
    String code = "a,\"foo\",b\na,   \" foo\",b\na,\"foo \"  ,b\na,  \" foo \"  ,b";
    TestCSVParser parser = new TestCSVParser(new StringReader(code));
    assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
    //     assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
    assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken());
  }

  // encapsulator tokenizer (multi line, delimiter in string)
  public void testNextToken5() throws IOException {
    String code = "a,\"foo\n\",b\n\"foo\n  baar ,,,\"\n\"\n\t \n\"";
    TestCSVParser parser = new TestCSVParser(new StringReader(code));
    assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";foo\n  baar ,,,;", parser.testNextToken());
    assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken());
  }

  // change delimiters, comment, encapsulator
  public void testNextToken6() throws IOException {
    /* file: a;'b and \' more
     *       '
     *       !comment;;;;
     *       ;;
     */
    String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
    TestCSVParser parser =
        new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!'));
    assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
    assertEquals(CSVParser.TT_EORECORD + ";b and ' more\n;", parser.testNextToken());
  }

  // ======================================================
  //   parser tests
  // ======================================================

  String code =
      "a,b,c,d\n"
          + " a , b , 1 2 \n"
          + "\"foo baar\", b,\n"
          // + "   \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
          + "   \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
  String[][] res = {
    {"a", "b", "c", "d"},
    {"a", "b", "1 2"},
    {"foo baar", "b", ""},
    {"foo\n,,\n\",,\n\"", "d", "e"}
  };

  public void testGetLine() throws IOException {
    CSVParser parser = new CSVParser(new StringReader(code));
    String[] tmp = null;
    for (String[] re : res) {
      tmp = parser.getLine();
      assertArrayEquals(re, tmp);
    }
    tmp = parser.getLine();
    assertNull(tmp);
  }

  public void testNextValue() throws IOException {
    CSVParser parser = new CSVParser(new StringReader(code));
    String tmp = null;
    for (String[] re : res) {
      for (String r : re) {
        tmp = parser.nextValue();
        assertEquals(r, tmp);
      }
    }
    tmp = parser.nextValue();
    assertNull(tmp);
  }

  public void testGetAllValues() throws IOException {
    CSVParser parser = new CSVParser(new StringReader(code));
    String[][] tmp = parser.getAllValues();
    assertEquals(res.length, tmp.length);
    assertTrue(tmp.length > 0);
    for (int i = 0; i < res.length; i++) {
      assertArrayEquals(res[i], tmp[i]);
    }
  }

  public void testExcelStrategy1() throws IOException {
    String code =
        "value1,value2,value3,value4\r\na,b,c,d\r\n  x,,,"
            + "\r\n\r\n\"\"\"hello\"\"\",\"  \"\"world\"\"\",\"abc\ndef\",\r\n";
    String[][] res = {
      {"value1", "value2", "value3", "value4"},
      {"a", "b", "c", "d"},
      {"  x", "", "", ""},
      {""},
      {"\"hello\"", "  \"world\"", "abc\ndef", ""}
    };
    CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
    String[][] tmp = parser.getAllValues();
    assertEquals(res.length, tmp.length);
    assertTrue(tmp.length > 0);
    for (int i = 0; i < res.length; i++) {
      assertArrayEquals(res[i], tmp[i]);
    }
  }

  public void testExcelStrategy2() throws Exception {
    String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
    String[][] res = {
      {"foo", "baar"},
      {""},
      {"hello", ""},
      {""},
      {"world", ""}
    };
    CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
    String[][] tmp = parser.getAllValues();
    assertEquals(res.length, tmp.length);
    assertTrue(tmp.length > 0);
    for (int i = 0; i < res.length; i++) {
      assertArrayEquals(res[i], tmp[i]);
    }
  }

  public void testEndOfFileBehaviourExcel() throws Exception {
    String[] codes = {
      "hello,\r\n\r\nworld,\r\n",
      "hello,\r\n\r\nworld,",
      "hello,\r\n\r\nworld,\"\"\r\n",
      "hello,\r\n\r\nworld,\"\"",
      "hello,\r\n\r\nworld,\n",
      "hello,\r\n\r\nworld,",
      "hello,\r\n\r\nworld,\"\"\n",
      "hello,\r\n\r\nworld,\"\""
    };
    String[][] res = {
      {"hello", ""},
      {""}, // ExcelStrategy does not ignore empty lines
      {"world", ""}
    };
    for (String code : codes) {
      CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
      String[][] tmp = parser.getAllValues();
      assertEquals(res.length, tmp.length);
      assertTrue(tmp.length > 0);
      for (int i = 0; i < res.length; i++) {
        assertArrayEquals(res[i], tmp[i]);
      }
    }
  }

  public void testEndOfFileBehaviorCSV() throws Exception {
    String[] codes = {
      "hello,\r\n\r\nworld,\r\n",
      "hello,\r\n\r\nworld,",
      "hello,\r\n\r\nworld,\"\"\r\n",
      "hello,\r\n\r\nworld,\"\"",
      "hello,\r\n\r\nworld,\n",
      "hello,\r\n\r\nworld,",
      "hello,\r\n\r\nworld,\"\"\n",
      "hello,\r\n\r\nworld,\"\""
    };
    String[][] res = {
      {"hello", ""}, // CSV Strategy ignores empty lines
      {"world", ""}
    };

    for (String code : codes) {
      CSVParser parser = new CSVParser(new StringReader(code));
      String[][] tmp = parser.getAllValues();
      assertEquals(res.length, tmp.length);
      assertTrue(tmp.length > 0);
      for (int i = 0; i < res.length; i++) {
        assertArrayEquals(res[i], tmp[i]);
      }
    }
  }

  public void testEmptyLineBehaviourExcel() throws Exception {
    String[] codes = {
      "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"
    };
    String[][] res = {
      {"hello", ""},
      {""}, // ExcelStrategy does not ignore empty lines
      {""}
    };
    for (String code : codes) {
      CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
      String[][] tmp = parser.getAllValues();
      assertEquals(res.length, tmp.length);
      assertTrue(tmp.length > 0);
      for (int i = 0; i < res.length; i++) {
        assertArrayEquals(res[i], tmp[i]);
      }
    }
  }

  public void testEmptyLineBehaviourCSV() throws Exception {
    String[] codes = {
      "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"
    };
    String[][] res = {
      {"hello", ""} // CSV Strategy ignores empty lines
    };
    for (String code : codes) {
      CSVParser parser = new CSVParser(new StringReader(code));
      String[][] tmp = parser.getAllValues();
      assertEquals(res.length, tmp.length);
      assertTrue(tmp.length > 0);
      for (int i = 0; i < res.length; i++) {
        assertArrayEquals(res[i], tmp[i]);
      }
    }
  }

  public void testBackslashEscaping() throws IOException {

    // To avoid confusion over the need for escaping chars in java code,
    // We will test with a forward slash as the escape char, and a single
    // quote as the encapsulator.

    String code =
        "one,two,three\n" // 0
            + "'',''\n" // 1) empty encapsulators
            + "/',/'\n" // 2) single encapsulators
            + "'/'','/''\n" // 3) single encapsulators encapsulated via escape
            + "'''',''''\n" // 4) single encapsulators encapsulated via doubling
            + "/,,/,\n" // 5) separator escaped
            + "//,//\n" // 6) escape escaped
            + "'//','//'\n" // 7) escape escaped in encapsulation
            + "   8   ,   \"quoted \"\" /\" // string\"   \n" // don't eat spaces
            + "9,   /\n   \n" // escaped newline
            + "";
    String[][] res = {
      {"one", "two", "three"}, // 0
      {"", ""}, // 1
      {"'", "'"}, // 2
      {"'", "'"}, // 3
      {"'", "'"}, // 4
      {",", ","}, // 5
      {"/", "/"}, // 6
      {"/", "/"}, // 7
      {"   8   ", "   \"quoted \"\" \" / string\"   "},
      {"9", "   \n   "},
    };

    CSVStrategy strategy =
        new CSVStrategy(
            ',', '\'', CSVStrategy.COMMENTS_DISABLED, '/', false, false, true, true, "\n");

    CSVParser parser = new CSVParser(new StringReader(code), strategy);
    String[][] tmp = parser.getAllValues();
    assertTrue(tmp.length > 0);
    for (int i = 0; i < res.length; i++) {
      assertArrayEquals(res[i], tmp[i]);
    }
  }

  public void testBackslashEscaping2() throws IOException {

    // To avoid confusion over the need for escaping chars in java code,
    // We will test with a forward slash as the escape char, and a single
    // quote as the encapsulator.

    String code =
        ""
            + " , , \n" // 1)
            + " \t ,  , \n" // 2)
            + " // , /, , /,\n" // 3)
            + "";
    String[][] res = {
      {" ", " ", " "}, // 1
      {" \t ", "  ", " "}, // 2
      {" / ", " , ", " ,"}, // 3
    };

    CSVStrategy strategy =
        new CSVStrategy(
            ',',
            CSVStrategy.ENCAPSULATOR_DISABLED,
            CSVStrategy.COMMENTS_DISABLED,
            '/',
            false,
            false,
            true,
            true,
            "\n");

    CSVParser parser = new CSVParser(new StringReader(code), strategy);
    String[][] tmp = parser.getAllValues();
    assertTrue(tmp.length > 0);

    if (!CSVPrinterTest.equals(res, tmp)) {
      fail();
    }
  }

  public void testDefaultStrategy() throws IOException {

    String code =
        ""
            + "a,b\n" // 1)
            + "\"\n\",\" \"\n" // 2)
            + "\"\",#\n" // 2)
        ;
    String[][] res = {
      {"a", "b"},
      {"\n", " "},
      {"", "#"},
    };

    CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
    assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart());

    CSVParser parser = new CSVParser(new StringReader(code), strategy);
    String[][] tmp = parser.getAllValues();
    assertTrue(tmp.length > 0);

    if (!CSVPrinterTest.equals(res, tmp)) {
      fail();
    }

    String[][] res_comments = {
      {"a", "b"},
      {"\n", " "},
      {""},
    };

    strategy = new CSVStrategy(',', '"', '#');
    parser = new CSVParser(new StringReader(code), strategy);
    tmp = parser.getAllValues();

    if (!CSVPrinterTest.equals(res_comments, tmp)) {
      fail();
    }
  }

  public void testUnicodeEscape() throws IOException {
    String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
    CSVStrategy strategy =
        new CSVStrategy(
            ',',
            '"',
            CSVStrategy.COMMENTS_DISABLED,
            CSVStrategy.ESCAPE_DISABLED,
            true,
            true,
            true,
            true,
            CSVStrategy.DEFAULT_PRINTER_NEWLINE);
    CSVParser parser = new CSVParser(new StringReader(code), strategy);
    String[] data = parser.getLine();
    assertEquals(2, data.length);
    assertEquals("abc", data[0]);
    assertEquals("public", data[1]);
  }

  public void testCarriageReturnLineFeedEndings() throws IOException {
    String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
    CSVParser parser = new CSVParser(new StringReader(code));
    String[][] data = parser.getAllValues();
    assertEquals(4, data.length);
  }

  public void testIgnoreEmptyLines() throws IOException {
    String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
    // String code = "world\r\n\n";
    // String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
    CSVParser parser = new CSVParser(new StringReader(code));
    String[][] data = parser.getAllValues();
    assertEquals(3, data.length);
  }

  public void testLineTokenConsistency() throws IOException {
    String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
    CSVParser parser = new CSVParser(new StringReader(code));
    String[][] data = parser.getAllValues();
    parser = new CSVParser(new StringReader(code));
    CSVParser parser1 = new CSVParser(new StringReader(code));
    for (String[] datum : data) {
      assertArrayEquals(parser1.getLine(), datum);
      for (String d : datum) {
        assertEquals(parser.nextValue(), d);
      }
    }
  }

  // From SANDBOX-153
  public void testDelimiterIsWhitespace() throws IOException {
    String code = "one\ttwo\t\tfour \t five\t six";
    TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY);
    assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken());
    assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken());
    assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken());
  }
}
